This script is used for modeling pH of the particluar geochemical system

In [1]:
import pickle
import numpy as np
import pandas as pd
import os
import seaborn as sns
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error,mean_squared_error,r2_score

## The following are the ML models which can be used for trasinning
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern
from sklearn.linear_model import LinearRegression

from sklearn.preprocessing import MinMaxScaler,StandardScaler

import timeit
import warnings
warnings.filterwarnings("ignore")
In [2]:
import matplotlib.pyplot as plt
import matplotlib as mpl

import numpy as np
%matplotlib inline
import pandas as pd
In [3]:
import plotly.express as px
In [4]:
import seaborn as sns
sns.set(style="darkgrid")
In [5]:
sns.set_context('talk')
In [6]:
data_dir =r'E:\projects\MLChemicalR\uranium\20_training_sets\30_phreeqc'
datafiles = os.listdir(data_dir)
datafiles =[fl for fl in datafiles if 'dat' in fl]
In [7]:
out_dir = os.path.join(data_dir,'output')
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
In [8]:
datafiles
Out[8]:
['10_PMU_02_LHS_500000_54854_02_t_P.dat',
 '10_PMU_02_LHS_50000_54854_02_t_P.dat',
 '10_PMU_02_LHS_5000_54854_02_t_P.dat',
 '10_PMU_02_LHS_500_54854_02_t_P.dat']
In [9]:
dataset_all={}
for file in datafiles:
    InsFile = os.path.join(data_dir, file)
    data = pd.read_csv(InsFile,sep ='\t')
    data.columns =[col.strip() for col in data.columns]
    data =data.iloc[:-1,:-1]
    dataset_all[file[:-4]] = data
In [10]:
data50K =dataset_all['10_PMU_02_LHS_50000_54854_02_t_P']
In [11]:
data50K['diff'] = data50K['totBase']-data50K['totAcid']
In [12]:
conditions = [
    (data50K['pH'] < 7),
    (data50K['pH'] ==7),
    (data50K['pH'] > 7),
    ]
# create a list of the values we want to assign for each condition
values = [1, 2, 3]
data50K['GrouppH'] = np.select(conditions, values)
In [13]:
conditions = [
    (data50K['metaschoepite'] ==0),
    (data50K['metaschoepite'] >0),
    ]
# create a list of the values we want to assign for each condition
values = [1, 2]
data50K['GroupMeta'] = np.select(conditions, values)
In [14]:
data50K.tail()
Out[14]:
mass_H2O totU totAcid totBase pH U_aq U_s U_sc U_ex Kd_s Kd_sc Kd_ex metaschoepite diff GrouppH GroupMeta
49994 0.999894 9.095900e-05 1.000000e-09 1.871300e-02 12.1360 8.897400e-05 1.995000e-06 1.995000e-06 2.309200e-22 22.422 22.42200 2.595400e-15 0.0 0.018713 3 1
49995 1.000060 2.817900e-07 1.000000e-09 8.701300e-04 10.7810 1.221800e-08 2.695800e-07 2.695800e-07 1.414000e-21 22064.000 22064.00000 1.157400e-10 0.0 0.000870 3 1
49996 1.000020 4.389100e-05 1.000000e-09 5.106400e-03 11.5710 4.189600e-05 1.994800e-06 1.994800e-06 1.342100e-20 47.614 47.61400 3.203500e-13 0.0 0.005106 3 1
49997 0.999935 1.016900e-05 1.422900e-02 1.000000e-09 1.9613 9.511300e-06 6.582500e-07 2.240300e-09 6.560100e-07 69.208 0.23555 6.897200e+01 0.0 -0.014229 1 1
49998 0.999928 5.609300e-09 1.493300e-02 1.000000e-09 1.9398 5.246100e-09 3.636500e-10 1.118100e-12 3.625300e-10 69.319 0.21313 6.910500e+01 0.0 -0.014933 1 1
In [15]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='pH',color='pH',symbol ='GroupMeta')
fig.show()
In [16]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_s',color='pH',symbol ='GroupMeta')
fig.show()
In [17]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_aq',color='pH',symbol ='GroupMeta')
fig.show()
In [18]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_sc',color='pH',symbol ='GroupMeta')
fig.show()
In [19]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='U_ex',color='pH',symbol ='GroupMeta')
fig.show()
In [20]:
fig = px.scatter_3d(data50K, x='diff', y='totU', z='metaschoepite',color='pH',symbol ='GroupMeta')
fig.show()
In [21]:
data50K.columns
Out[21]:
Index(['mass_H2O', 'totU', 'totAcid', 'totBase', 'pH', 'U_aq', 'U_s', 'U_sc',
       'U_ex', 'Kd_s', 'Kd_sc', 'Kd_ex', 'metaschoepite', 'diff', 'GrouppH',
       'GroupMeta'],
      dtype='object')
In [22]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]:
 
In [ ]: